First step to this assignment is to list all the libraries that will be used.

library(tidyverse)
## -- Attaching packages ------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ---------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(sf)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
library(tigris)
## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(leaflet)
library("dplyr")
library("plyr")
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:plotly':
## 
##     arrange, mutate, rename, summarise
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:purrr':
## 
##     compact
library("readr")
library(censusapi)
## 
## Attaching package: 'censusapi'
## The following object is masked from 'package:methods':
## 
##     getFunction
library("rgdal")
## Loading required package: sp
## rgdal: version: 1.5-16, (SVN revision 1050)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.0.4, released 2020/01/28
## Path to GDAL shared files: C:/Users/mirei/OneDrive/Documents/R/win-library/4.0/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: C:/Users/mirei/OneDrive/Documents/R/win-library/4.0/rgdal/proj
## Linking to sp version:1.4-2
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
library("sp")
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

After downloading the data, I created a stacked dataset that combines all the various csv files.

years <- 2017:2020
quarters <- 1:4 #there are four quarters in each year
types <- c("Electric", "Gas") #Electric and Gas each have different sections: residential, commercial, etc

pge_Elec_Gas_All <- NULL #creating the empty dataset

for (quarter in quarters){
  for (year in years){
    #since there are no quarter 3 and4 for year 2020 I need to make sure that the loop does not search for these specific quarters
    if (year == 2020 & quarter == 3){
      next()
    }
    if (year == 2020 & quarter == 4){
      next()
    }
    for (type in types){
      filename <-
        paste0(
          "PGE_",
          year,
          "_Q",
          quarter,
          "_",
          type,
          "UsageByZip.csv"
        )
    temp <- read_csv(filename)
temp$TOTALkBTU <- NULL #creating an empty column
temp$AVERAGEkBTU <- NULL #creating an empty column
temp$DATE <- as.yearmon(paste(temp$YEAR, temp$MONTH), "%Y %m")
if (type == "Gas"){ #calculating the kBTU (total and average) for gas
  temp$TOTALkBTU <- temp$TOTALTHM * 100 
  temp$AVERAGEkBTU <- temp$TOTALkBTU/temp$TOTALCUSTOMERS
  temp$TOTALTHM <- NULL
  temp$AVERAGETHM <- NULL
}
if (type == "Electric"){ #calculating the kBTU (total and average) for electricity
  temp$TOTALkBTU <- temp$TOTALKWH * 3.4121416
  temp$AVERAGEkBTU <- temp$TOTALkBTU/temp$TOTALCUSTOMERS
  temp$TOTALKWH <- NULL
  temp$AVERAGEKWH <- NULL
}
pge_Elec_Gas_All <- rbind(pge_Elec_Gas_All, temp)
    
    }
  }
saveRDS(pge_Elec_Gas_All, "pge_Elec_Gas_All.rds")  
}
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_double()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_double()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALKWH = col_number(),
##   AVERAGEKWH = col_number()
## )
## Parsed with column specification:
## cols(
##   ZIPCODE = col_double(),
##   MONTH = col_double(),
##   YEAR = col_double(),
##   CUSTOMERCLASS = col_character(),
##   COMBINED = col_character(),
##   TOTALCUSTOMERS = col_number(),
##   TOTALTHM = col_number(),
##   AVERAGETHM = col_number()
## )
head("pge_Elec_Gas_All.rds")
## [1] "pge_Elec_Gas_All.rds"

Once I have created the stacked dataset, I will filter the data to have only the Bay Area zipcodes.

options(tigris_use_cache = FALSE)
ca_counties <- counties("CA", cb = T, progress_bar = F)
#st_drivers()
bay_county_names <-
  c(
    "Alameda",
    "Contra Costa",
    "Marin",
    "Napa",
    "San Francisco",
    "San Mateo",
    "Santa Clara",
    "Solano",
    "Sonoma"
  )

bay_counties <-
  ca_counties %>%
  filter(NAME %in% bay_county_names)

usa_zips <- 
  zctas(cb = T, progress_bar = F)
#creates a dataframe of all bay area zipcodes
bay_zips <-
  usa_zips %>% 
  st_centroid() %>% 
  .[bay_counties, ] %>% 
  st_set_geometry(NULL) %>% 
  left_join(usa_zips %>% select(GEOID10)) %>% 
  st_as_sf() #brings back coordinate reference systems
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
## Warning in st_centroid.sfc(st_geometry(x), of_largest_polygon =
## of_largest_polygon): st_centroid does not give correct centroids for longitude/
## latitude data
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## Joining, by = "GEOID10"
#now actually filtering the datasets that have the Bay Area zipcodes
bayAreaPGE<- subset(pge_Elec_Gas_All, pge_Elec_Gas_All$ZIPCODE %in% bay_zips$ZCTA5CE10)

After creating a dataset that only includes the Bay area zipcodes, I will filter the dataset to only include gas and electric for residential and commercial use.

bayAreaPGE_elec_final <-
  bayAreaPGE %>%
  # subset(pge_Elec_Gas_All, pge_Elec_Gas_All$ZIPCODE %in% bay_zips$ZCTA5CE10)%>%
  filter( #choose only these customer classes
    CUSTOMERCLASS %in%
      c(
        "Elec- Residential",
        "Elec- Commercial", "Gas- Residential", "Gas- Commercial" #I could add gas data in here but I would have to change the subset
      )

  ) %>%
  select(
    !c(COMBINED, AVERAGEkBTU)#don't include these columns
  ) %>%
  dplyr::group_by(DATE, CUSTOMERCLASS) %>% #grouping by residential or commercial gas/electric and by date/month
  dplyr::summarise(
    TOTALkBTU =
      sum(
        TOTALkBTU,
        na.rm = T
      ),
    TOTALCUSTOMERS =
      sum(
        TOTALCUSTOMERS,
        na.rm = T
      )
  ) %>%
  mutate(
    AVERAGEkBTU =
      TOTALkBTU/TOTALCUSTOMERS
  )
## `summarise()` regrouping output by 'DATE' (override with `.groups` argument)
bayAreaPGE_elec_final
## # A tibble: 168 x 5
## # Groups:   DATE [42]
##    DATE      CUSTOMERCLASS        TOTALkBTU TOTALCUSTOMERS AVERAGEkBTU
##    <yearmon> <chr>                    <dbl>          <dbl>       <dbl>
##  1 Jan 2017  Elec- Commercial   5632400479.         174860      32211.
##  2 Jan 2017  Elec- Residential  4803652706.        2495640       1925.
##  3 Jan 2017  Gas- Commercial    4604768900           89496      51452.
##  4 Jan 2017  Gas- Residential  17948726200         2179812       8234.
##  5 Feb 2017  Elec- Commercial   4687467036.         165585      28309.
##  6 Feb 2017  Elec- Residential  3821658257.        2497198       1530.
##  7 Feb 2017  Gas- Commercial    3292140900           89236      36893.
##  8 Feb 2017  Gas- Residential  11845495100         2180941       5431.
##  9 Mar 2017  Elec- Commercial   4906893462.         161665      30352.
## 10 Mar 2017  Elec- Residential  3697269785.        2498373       1480.
## # ... with 158 more rows

Create the graph

Now that I have my dataset that shows the monthly total kBTUs of residential and commercial electricity and gas consumption for the Bay Area from 2017 to 2020 quarter 2, I will create a bar graph to visualize this.

pge_chart <-
  bayAreaPGE_elec_final %>% 
  ggplot() +
  geom_bar(
    aes(
      x = DATE %>% factor(),
      y = TOTALkBTU,
      fill = CUSTOMERCLASS
    ),
    stat = "identity",
    position = "stack"
  ) +
  labs(
    x = "Date",
    y = "kBTU",
    title = "PG&E Bay Area Monthly Electricity and Gas Usage, 2017-2020",
    fill = "Customer Class Type"
  ) + 
  theme(
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)#to shift the dates so they're visible
    )

pge_chart

pge_chart %>% ggplotly()

Q:Comment on any observable changes in energy consumption that may be attributable to the COVID-19 pandemic. A: Looking at the total kBTUs for March from 2019 to 2020, the electricity usage from commercial decreased by 357.5 million kBTU (total consumption) which could be due to businesses having to close down towards the middle of March. In regards to residential, electricity usage increased by roughly 150=1.2 million kBTU (total kBTU) and gas usage also increased 96.6 million kBTU. This increase in consumption can be due to people being laid off and activities being shut down which led to people staying at home longer than usual.

For the second part of the assignment, I created a dataset that calculates the change of electricity consumption due to COVID-19. I assumed that the best way to do this was to find the change of electricity consumption by subtracting 2020’s data with 2019’s data and dividing that value by 2019’s data value. I also assumed that the way to look at neighborhoods who experienced the greatest change in electricity consumption was to focus on residential consumption specifically as oppose to all of electricity consumption. I decided to keep the values as kBTU to make it easier to analyze. I removed any values that had 0 for 2019’s electricity residential usage.

bayAreaCOVID <-
  bayAreaPGE %>%
  filter(
    YEAR %in% (2019:2020), MONTH %in% (1:5), CUSTOMERCLASS == "Elec- Residential"
    ) %>%
  mutate(
    ZIPCODE = ZIPCODE %>% as.character()
    ) %>%
  dplyr::group_by(ZIPCODE, YEAR) %>%
  dplyr::summarise(
    TOTALkBTU =
      sum(
        TOTALkBTU, 
        na.rm = T
      )
  ) %>%
  right_join(
    bay_zips %>% select(GEOID10),
    by = c("ZIPCODE" = "GEOID10")
  ) %>%
  pivot_wider(
    names_from = YEAR,
    names_prefix = "kBTU",
    values_from = TOTALkBTU
  ) %>%
  dplyr::filter(kBTU2019> 0) %>%
  mutate(
    COVID_influence =
      ((kBTU2019 - kBTU2020)/ kBTU2019) * 100
  ) %>%
  st_as_sf()%>%
  st_transform(4326)
## `summarise()` regrouping output by 'ZIPCODE' (override with `.groups` argument)
bayAreaCOVID
## Simple feature collection with 267 features and 5 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -123.5335 ymin: 36.89303 xmax: -121.3083 ymax: 38.93713
## geographic CRS: WGS 84
## # A tibble: 267 x 6
## # Groups:   ZIPCODE [267]
##    ZIPCODE                     geometry kBTU2019 kBTU2020 kBTUNA COVID_influence
##  * <chr>             <MULTIPOLYGON [°]>    <dbl>    <dbl>  <dbl>           <dbl>
##  1 94002   (((-122.3359 37.50805, -122~   7.59e7   7.75e7     NA         -2.18  
##  2 94005   (((-122.442 37.69435, -122.~   1.28e7   1.33e7     NA         -4.33  
##  3 94010   (((-122.4126 37.58916, -122~   1.65e8   1.69e8     NA         -2.17  
##  4 94014   (((-122.4718 37.70225, -122~   9.65e7   1.01e8     NA         -5.17  
##  5 94015   (((-122.4983 37.70813, -122~   1.27e8   1.33e8     NA         -4.77  
##  6 94019   (((-122.5113 37.52301, -122~   4.22e7   4.22e7     NA          0.0134
##  7 94020   (((-122.3284 37.3143, -122.~   7.97e6   8.17e6     NA         -2.51  
##  8 94022   (((-122.1855 37.32374, -122~   9.57e7   9.69e7     NA         -1.30  
##  9 94024   (((-122.1416 37.34951, -122~   8.78e7   9.00e7     NA         -2.50  
## 10 94025   (((-122.2291 37.4243, -122.~   1.33e8   1.38e8     NA         -3.90  
## # ... with 257 more rows

Once I created the dataset, I created the map to visualize the data.

res_pal <- colorNumeric(
  palette = "Blues",
  domain =
    bayAreaCOVID$COVID_influence
)

leaflet() %>% #leaflet wants a spatial object
  addTiles() %>%
  addPolygons(
    data = bayAreaCOVID,
    fillColor = ~res_pal(COVID_influence),
    color = "white",
    opacity = 0.5,
    fillOpacity = 0.5,
    weight = 1,
    label = ~paste0(
      round(COVID_influence),
      "kBTU Percent Change in",
      ZIPCODE
    ),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
  )%>%
  addLegend(
    data = bayAreaCOVID,
    pal = res_pal,
    values = ~COVID_influence,
    title = "Percent Change in Residential<br>Electric Energy Usage possibly<br>due to COVID-19"
)

Based on this map, the neighborhood that experienced the greatest change was a neighborhood in Oakland with a 63% change.